package com.hylanda.entity.name;

import java.io.File;
import java.io.IOException;
import java.util.*;
import java.util.Map.Entry;

import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.hylanda.dictionary.DictFlag;
import com.hylanda.dictionary.UserDictionary;
import com.hylanda.segmentor.BasicSegmentor;
import com.hylanda.segmentor.common.SegOption;
import com.hylanda.segmentor.common.SegResult;
import com.hylanda.segmentor.common.Token;
import com.hylanda.tools.CommonTools;

public class HLNameProcess {

	private final String FIELD_SUFFIX_NAME = "_names";
	private final String FIELD_SUFFIX_ORG = "_orgs";

	private Set<String> dealFieldNameSet = new HashSet<>();
	private Set<String> dealFieldOrgSet = new HashSet<>();
	private Set<String> unionSet = new HashSet<>();

	private BlackListMgr blackMgrInstance = BlackListMgr.getInstance();

	private BasicSegmentor segmentor_name = new BasicSegmentor();
	private UserDictionary userDictObj = null;
	private SegOption option = new SegOption();
	private boolean usrFillDict = false;
	private boolean isUseCoreDict = false;

	public boolean init(Set<String> nameFieldSet, Set<String> orgFieldSet, String id, String usrdictPath, String savePath,
			String whiteWords, String blackWords, StringBuffer errBuff) {

		dealFieldNameSet.addAll(nameFieldSet);
		dealFieldOrgSet.addAll(orgFieldSet);

		unionSet.addAll(dealFieldNameSet);
		unionSet.addAll(dealFieldOrgSet);
		boolean ret = false;

		if (!unionSet.isEmpty()) {
			blackMgrInstance.init(getClass().getClassLoader().getResourceAsStream("blacklist_Name.txt"), blackWords);

			ret = initseg(savePath, usrdictPath, id, whiteWords, errBuff);
		} else {
			ret = true;
		}

		return ret;
	}
	
	public void uninit(){
		if(isUseCoreDict){
			if(usrFillDict){
				segmentor_name.unloadUserDict();
				segmentor_name.unuseStaticCoreDictionary();
			}else{
				segmentor_name.unuseStaticDictionary();
			}
			isUseCoreDict = false;
		}
	}
	
	public boolean doRecognition(Map<String, String> dataMap) {

		boolean ret = false;

		// 看下预处理的结果，来判断下是否需要进行分词的处理
		for (String field : unionSet) {
			String content = dataMap.get(field);

			if (StringUtils.isEmpty(content)) {
				continue;
			}

			ret = getEntityName(field, content, dealFieldNameSet.contains(field), dealFieldOrgSet.contains(field),
					dataMap);
		}

		return ret;
	}

	private boolean initseg(String savePath, String usrdictPath, String id, String whiteWords, StringBuffer errBuff) {

		boolean ret = false;
		// 为了人名和机构名初始化分词
		// 初始化白名单
		if(StringUtils.isBlank(whiteWords)){
			segmentor_name.useStaticDictionary();
			isUseCoreDict = true;
			ret = true;
		}else{
			String usrdict = CommonTools.buildUsrDict(usrdictPath, savePath, id, whiteWords, "nr");
			if(usrdict == null){
				errBuff.append("usrdict is null");
				return false;
			}

			segmentor_name.useStaticCoreDictionary();
			ret = segmentor_name.loadUserDict(usrdict);
			if (!ret) {
				errBuff.append("seg load name usr dict err!usrdict path:" + usrdict);
				segmentor_name.unuseStaticCoreDictionary();
				return ret;
			}else{
				isUseCoreDict = true;
				usrFillDict = true;
			}
			
			// 删除临时的白名单文件
			try {
				FileUtils.forceDelete(new File(usrdict));
				ret = true;
			} catch (IOException e) {

				errBuff.append("seg delete name usr dict err!");
			}
		}
		
//		option.doPosTagging = true; // 为了是人名识别的准确率高，加上词性标注

		// 获取自定义词典
		userDictObj = segmentor_name.getUserDict();

		return ret;

	}

	/**
	 * 获取分词结果。逻辑如下：先看之前是够分过词，如果有，则拿来直接使用，否则，需要新作分词，并加到datamap中，以供别的能力直接使用
	 * 
	 * @param content
	 * @return
	 */
	private SegResult getSegResult(String content) {

		if (StringUtils.isEmpty(content)) {
			return null;
		}

		return segmentor_name.segment(content, option);

	}

	/**
	 * 找出分词识别出是人名的，并且满足一些条件的
	 * 
	 * @return
	 */
	private boolean isSegPeopleResult(Token token) {

		boolean ret = false;
		String word = token.wordStr;

		if (token.wordIdInUsrDict > 0) {

			int userNatureFlag = userDictObj.getNatureFlag(token.wordIdInUsrDict);
			if ((userNatureFlag & DictFlag.NATURE_NR) != 0) {
				ret = true;
			}

		} else if ((token.natureFlag & DictFlag.NATURE_NR) != 0) {
			if ((word.length() == 2 && (word.startsWith("小") || word.startsWith("老") || word.endsWith("某")))
					|| word.length() == 1 || blackMgrInstance.isBlackWord(word)) {
				// 长度、模糊称谓、黑名单 过滤不计
			} else {
				// 是人名,如果是人名，需要加到结果集合里面去
				ret = true;
			}
		}

		return ret;
	}

	/**
	 * 判断是不是机构名
	 * 
	 * @param token
	 * @return
	 */
	private boolean isSegOrgResult(Token token) {

		boolean ret = false;

		if ((token.natureFlag & DictFlag.NATURE_NT) != 0) {

			ret = true;
		}

		return ret;
	}

	/**
	 * 从分词结果中取人名
	 * 
	 * @param field
	 * @param content
	 * @param dataMap
	 */
	private boolean getEntityName(String field, String content, boolean nameExtract, boolean orgExtract,
			Map<String, String> dataMap) {
		// 根据分词结果获得人名
		SegResult segResult = getSegResult(content);
		if (segResult == null) {
			return false;
		}

		boolean ret = false;

		Map<String, Integer> orgCountMap = new HashMap<>();
		Map<String, Integer> nameCountMap = new HashMap<>();
		Map<String, Double> weightMap = new HashMap<>();

		List<Token> keywords = segResult.getKeywordsList();
		for (Token keyword : keywords) {
			weightMap.put(keyword.wordStr, keyword.weight);
		}

		// 解析分析结果
		Token token = segResult.getFirst();
		while (token != null) {
			if (nameExtract && isSegPeopleResult(token)) {
				SegmentProcess.addResult(token.wordStr, nameCountMap);
			}

			if (orgExtract && isSegOrgResult(token)) {
				SegmentProcess.addResult(token.wordStr, orgCountMap);
			}

			token = token.next;
		}

		// 添加结果字段
		JSONArray nameArray = buildResult(nameCountMap, weightMap);
		JSONArray orgArray = buildResult(orgCountMap, weightMap);

		if (!nameArray.isEmpty()) {
			JSONObject jsonObject = new JSONObject();
			jsonObject.put("data_type", "ne_name");
			jsonObject.put("datas", nameArray);
			dataMap.put(field.concat(FIELD_SUFFIX_NAME), jsonObject.toJSONString());
			ret = true;
		}

		if (!orgArray.isEmpty()) {
			JSONObject jsonObject = new JSONObject();
			jsonObject.put("data_type", "ne_org");
			jsonObject.put("datas", orgArray);
			dataMap.put(field.concat(FIELD_SUFFIX_ORG), jsonObject.toJSONString());
			ret = true;
		}

		return ret;

	}

	public List<String> genNameFieldList(String sourceFields){
		return genFieldListBySuffix(sourceFields, FIELD_SUFFIX_NAME);
	}

	public List<String> genOrgFieldList(String sourceFields){
		return genFieldListBySuffix(sourceFields, FIELD_SUFFIX_ORG);
	}

	private List<String> genFieldListBySuffix(String sourceFields, String suffix) {
		ArrayList<String> fieldsList = new ArrayList<>();
		String[] fields = sourceFields.split(",");
		for(String field: fields) {
			fieldsList.add(field.trim().concat(suffix));
		}
		return fieldsList;
	}

	private JSONArray buildResult(Map<String, Integer> nameCountMap, Map<String, Double> nameWeightMap) {
		JSONArray array = new JSONArray();
		for (Entry<String, Integer> entry : nameCountMap.entrySet()) {
			JSONObject jsonObject = new JSONObject();
			jsonObject.put("str", entry.getKey());
			jsonObject.put("count", entry.getValue());

			double val = 0;
			if (nameWeightMap.containsKey(entry.getKey())) {
				val = nameWeightMap.get(entry.getKey());
			}
			jsonObject.put("weight", val);
			array.add(jsonObject);
		}

		return array;
	}



}
